Documento de análise exploratoria de dados em R
# Load de variáveis e de bibliotecas
library(readxl)
library(tidyverse)
library(dplyr)
library(stringr)
library(lubridate)
library(janitor)
library(zoo)
library(tsbox)
library(forecast)
library(plotly)
library(xts) # lib para time series
library(spotifyr)
(A) Visualização básica de dados
load(file = "us_change.rda")
data_nivel <- us_change
# Calculando a correlação da taxa entre as variáveis do primeiro trimestre de do ano 2000
data_nivel$Consumption[121] <- 100
for(i in 2:length(data_nivel$Consumption)){
data_nivel$Consumption[i] <- (1 + data_nivel$Consumption[i]/100) * data_nivel$Consumption[i-1]
}
data_nivel$Income[121] <- 100
for(i in 2:length(data_nivel$Income)){
data_nivel$Income[i] <- (1 + data_nivel$Income[i]/100) * data_nivel$Income[i-1]
}
data_nivel$Production[121] <- 100
for(i in 2:length(data_nivel$Production)){
data_nivel$Production[i] <- (1 + data_nivel$Production[i]/100) * data_nivel$Production[i-1]
}
data_nivel$Savings[121] <- 100
for(i in 2:length(data_nivel$Savings)){
data_nivel$Savings[i] <- (1 + data_nivel$Savings[i]/100) * data_nivel$Savings[i-1]
}
data_nivel$Unemployment[121] <- 100
for(i in 2:length(data_nivel$Unemployment)){
data_nivel$Unemployment[i] <- (1 + data_nivel$Unemployment[i]/100) * data_nivel$Unemployment[i-1]
}
(B) Correlação da taxa de variação, sem índice
correl2 <- cor(us_change %>%
select(-"Quarter")) %>% round(2)
plot2 <- corrplot::corrplot(correl2,
type = "upper",
tl.col = "black",
)
Conseguimos ver nesses gráficos de correlação a diferença entre usar índice ou apenas a taxa de variação para fazer a correlação. Usando um índice, você está normalizando os dados, fazendo com que a leitura da relação da informação fique padronizada.
(C) Plot de dispersão em linhas
new_data_nivel <- data_nivel %>%
pivot_longer(-Quarter) %>%
select(date = Quarter, name, value)
p <- new_data_nivel %>%
#filter( name %in% c("pmc", "pim_nivel")) %>%
ggplot( aes(x = date, y = round(value,3), color = name)) +
geom_line() +
theme_bw()
ggplotly(p)
(C) Plot de dispersão em geom_point
p2 <- new_data_nivel %>%
#filter( name %in% c("pmc", "pim_nivel")) %>%
ggplot( aes(x = date, y = round(value,3), color = name)) +
geom_point() +
theme_bw()
ggplotly(p2)
(D) É possível observar que sempre existe uma relação entre esses dados. No gráfico da correlação, é possível tirar os dados de crescimento e queda das variáveis e a relação entre elas, já na evolução das variáveis no tempo, além de se obter os valores de queda e crescimento, é possível acompanhar o momento em que essa transição ocorre.
(E) É possível visualizar que, a partir do ano 2000 a “produção” teve uma grande queda e uma coisa que pode ter gerado essa influencia pode ter sido o aumento da taxa de “desemprego” no mesmo ano. Mesmo que a renda tenha crescido a partir de 2000 a produção continuou em queda, mas por outro lado o “consumo” teve um aumento a partir desse ano e as pessaos começaram a poupar mais a partir desse ano também.
Séries de tempo, ciclo, sazonalidade e tendência (“retail.xlsx”)
# lendo a base de dados, selecioando um range para a base, excluido o cabeçalho do arquivo, limpando os nomes das colunas e modificando a coluna de "series_id" para "as_date()".
data <- read_excel("retail.xlsx", range = "A2:GH383") %>%
janitor::clean_names() %>%
dplyr::rename(date = colnames(.)[1]) %>%
mutate(date = as_date(date))
# É necessário não selecionar o cabeçalho do arquivo, pois quando se pega o cabeçalho os dados ficam bugados
Plotando a coluna “a3349335t”

LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKYXV0aG9yOiBQZWRybyBIZW5yaXF1ZSBNb3JlaXJhIFBlcmVpcmEKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKIyMgRG9jdW1lbnRvIGRlIGFuw6FsaXNlIGV4cGxvcmF0b3JpYSBkZSBkYWRvcyBlbSBSCgpgYGB7cn0KIyBMb2FkIGRlIHZhcmnDoXZlaXMgZSBkZSBiaWJsaW90ZWNhcwpsaWJyYXJ5KHJlYWR4bCkKbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkoZHBseXIpCmxpYnJhcnkoc3RyaW5ncikKbGlicmFyeShsdWJyaWRhdGUpCmxpYnJhcnkoamFuaXRvcikKbGlicmFyeSh6b28pCmxpYnJhcnkodHNib3gpCmxpYnJhcnkoZm9yZWNhc3QpCmxpYnJhcnkocGxvdGx5KQpsaWJyYXJ5KHh0cykgIyBsaWIgcGFyYSB0aW1lIHNlcmllcwpsaWJyYXJ5KHNwb3RpZnlyKQpgYGAKCiMjIyAoQSkgVmlzdWFsaXphw6fDo28gYsOhc2ljYSBkZSBkYWRvcwpgYGB7cn0KbG9hZChmaWxlID0gInVzX2NoYW5nZS5yZGEiKQoKZGF0YV9uaXZlbCA8LSB1c19jaGFuZ2UKCiMgQ2FsY3VsYW5kbyBhIGNvcnJlbGHDp8OjbyBkYSB0YXhhIGVudHJlIGFzIHZhcmnDoXZlaXMgZG8gcHJpbWVpcm8gdHJpbWVzdHJlIGRlIGRvIGFubyAyMDAwCgpkYXRhX25pdmVsJENvbnN1bXB0aW9uWzEyMV0gPC0gMTAwCmZvcihpIGluIDI6bGVuZ3RoKGRhdGFfbml2ZWwkQ29uc3VtcHRpb24pKXsKICBkYXRhX25pdmVsJENvbnN1bXB0aW9uW2ldIDwtICAoMSArIGRhdGFfbml2ZWwkQ29uc3VtcHRpb25baV0vMTAwKSAqIGRhdGFfbml2ZWwkQ29uc3VtcHRpb25baS0xXQp9CgpkYXRhX25pdmVsJEluY29tZVsxMjFdIDwtIDEwMApmb3IoaSBpbiAyOmxlbmd0aChkYXRhX25pdmVsJEluY29tZSkpewogIGRhdGFfbml2ZWwkSW5jb21lW2ldIDwtICAoMSArIGRhdGFfbml2ZWwkSW5jb21lW2ldLzEwMCkgKiBkYXRhX25pdmVsJEluY29tZVtpLTFdCn0KCmRhdGFfbml2ZWwkUHJvZHVjdGlvblsxMjFdIDwtIDEwMApmb3IoaSBpbiAyOmxlbmd0aChkYXRhX25pdmVsJFByb2R1Y3Rpb24pKXsKICBkYXRhX25pdmVsJFByb2R1Y3Rpb25baV0gPC0gICgxICsgZGF0YV9uaXZlbCRQcm9kdWN0aW9uW2ldLzEwMCkgKiBkYXRhX25pdmVsJFByb2R1Y3Rpb25baS0xXQp9CgpkYXRhX25pdmVsJFNhdmluZ3NbMTIxXSA8LSAxMDAKZm9yKGkgaW4gMjpsZW5ndGgoZGF0YV9uaXZlbCRTYXZpbmdzKSl7CiAgZGF0YV9uaXZlbCRTYXZpbmdzW2ldIDwtICAoMSArIGRhdGFfbml2ZWwkU2F2aW5nc1tpXS8xMDApICogZGF0YV9uaXZlbCRTYXZpbmdzW2ktMV0KfQoKZGF0YV9uaXZlbCRVbmVtcGxveW1lbnRbMTIxXSA8LSAxMDAKZm9yKGkgaW4gMjpsZW5ndGgoZGF0YV9uaXZlbCRVbmVtcGxveW1lbnQpKXsKICBkYXRhX25pdmVsJFVuZW1wbG95bWVudFtpXSA8LSAgKDEgKyBkYXRhX25pdmVsJFVuZW1wbG95bWVudFtpXS8xMDApICogZGF0YV9uaXZlbCRVbmVtcGxveW1lbnRbaS0xXQp9CgoKYGBgCiMjIyMgKEIpIENvcnJlbGHDp8OjbyBjb20gaW5kaWNlIDEwMCBwYXJhIG8gcHJpbWVpcm8gdHJpbWVzdHJlIGRvIGFubyAyMDAwCgoKYGBge3J9CmNvcnJlbCA8LSAgY29yKGRhdGFfbml2ZWwgJT4lIAogICAgICBzZWxlY3QoLSJRdWFydGVyIikpICU+JSByb3VuZCgyKSAKCgoKcGxvdDEgPC0gY29ycnBsb3Q6OmNvcnJwbG90KGNvcnJlbCwgCiAgICAgICAgICAgICAgICAgICB0eXBlID0gInVwcGVyIiwKICAgICAgICAgICAgICAgICAgIHRsLmNvbCA9ICJibGFjayIsCiAgICAgICAgICAgICAgICAgICApCgpgYGAKCiMjIyMgKEIpIENvcnJlbGHDp8OjbyBkYSB0YXhhIGRlIHZhcmlhw6fDo28sIHNlbSDDrW5kaWNlCgpgYGB7cn0KY29ycmVsMiA8LSAgY29yKHVzX2NoYW5nZSAlPiUgCiAgICAgIHNlbGVjdCgtIlF1YXJ0ZXIiKSkgJT4lIHJvdW5kKDIpIAoKcGxvdDIgPC0gY29ycnBsb3Q6OmNvcnJwbG90KGNvcnJlbDIsIAogICAgICAgICAgICAgICAgICAgdHlwZSA9ICJ1cHBlciIsCiAgICAgICAgICAgICAgICAgICB0bC5jb2wgPSAiYmxhY2siLAogICAgICAgICAgICAgICAgICAgKQpgYGAKIyMjIyBDb25zZWd1aW1vcyB2ZXIgbmVzc2VzIGdyw6FmaWNvcyBkZSBjb3JyZWxhw6fDo28gYSBkaWZlcmVuw6dhIGVudHJlIHVzYXIgw61uZGljZSBvdSBhcGVuYXMgYSB0YXhhIGRlIHZhcmlhw6fDo28gcGFyYSBmYXplciBhIGNvcnJlbGHDp8Ojby4gVXNhbmRvIHVtIMOtbmRpY2UsIHZvY8OqIGVzdMOhIG5vcm1hbGl6YW5kbyBvcyBkYWRvcywgZmF6ZW5kbyBjb20gcXVlIGEgbGVpdHVyYSBkYSByZWxhw6fDo28gZGEgaW5mb3JtYcOnw6NvIGZpcXVlIHBhZHJvbml6YWRhLiAKCgoKCiMjIyAoQykgUGxvdCBkZSBkaXNwZXJzw6NvIGVtIGxpbmhhcwpgYGB7cn0KCm5ld19kYXRhX25pdmVsIDwtIGRhdGFfbml2ZWwgJT4lIAogIHBpdm90X2xvbmdlcigtUXVhcnRlcikgJT4lIAogIHNlbGVjdChkYXRlID0gUXVhcnRlciwgbmFtZSwgdmFsdWUpCgpwIDwtIG5ld19kYXRhX25pdmVsICU+JSAKICAjZmlsdGVyKCBuYW1lICVpbiUgYygicG1jIiwgInBpbV9uaXZlbCIpKSAlPiUgCiAgZ2dwbG90KCBhZXMoeCA9IGRhdGUsIHkgPSByb3VuZCh2YWx1ZSwzKSwgY29sb3IgPSBuYW1lKSkgKwogIGdlb21fbGluZSgpICsKICB0aGVtZV9idygpCmdncGxvdGx5KHApCmBgYAoKCiMjIyAoQykgUGxvdCBkZSBkaXNwZXJzw6NvIGVtIGdlb21fcG9pbnQKYGBge3J9CnAyIDwtIG5ld19kYXRhX25pdmVsICU+JSAKICAjZmlsdGVyKCBuYW1lICVpbiUgYygicG1jIiwgInBpbV9uaXZlbCIpKSAlPiUgCiAgZ2dwbG90KCBhZXMoeCA9IGRhdGUsIHkgPSByb3VuZCh2YWx1ZSwzKSwgY29sb3IgPSBuYW1lKSkgKwogIGdlb21fcG9pbnQoKSArCiAgdGhlbWVfYncoKQpnZ3Bsb3RseShwMikKYGBgCgojIyMjIChEKSDDiSBwb3Nzw612ZWwgb2JzZXJ2YXIgcXVlIHNlbXByZSBleGlzdGUgdW1hIHJlbGHDp8OjbyBlbnRyZSBlc3NlcyBkYWRvcy4gTm8gZ3LDoWZpY28gZGEgY29ycmVsYcOnw6NvLCDDqSBwb3Nzw612ZWwgdGlyYXIgb3MgZGFkb3MgZGUgY3Jlc2NpbWVudG8gZSBxdWVkYSBkYXMgdmFyacOhdmVpcyBlIGEgcmVsYcOnw6NvIGVudHJlIGVsYXMsIGrDoSBuYSBldm9sdcOnw6NvIGRhcyB2YXJpw6F2ZWlzIG5vIHRlbXBvLCBhbMOpbSBkZSBzZSBvYnRlciBvcyB2YWxvcmVzIGRlIHF1ZWRhIGUgY3Jlc2NpbWVudG8sIMOpIHBvc3PDrXZlbCBhY29tcGFuaGFyIG8gbW9tZW50byBlbSBxdWUgZXNzYSB0cmFuc2nDp8OjbyBvY29ycmUuIAoKIyMjIyAoRSkgw4kgcG9zc8OtdmVsIHZpc3VhbGl6YXIgcXVlLCBhIHBhcnRpciBkbyBhbm8gMjAwMCBhICJwcm9kdcOnw6NvIiB0ZXZlIHVtYSBncmFuZGUgcXVlZGEgZSB1bWEgY29pc2EgcXVlIHBvZGUgdGVyIGdlcmFkbyBlc3NhIGluZmx1ZW5jaWEgcG9kZSB0ZXIgc2lkbyBvIGF1bWVudG8gZGEgdGF4YSBkZSAiZGVzZW1wcmVnbyIgbm8gbWVzbW8gYW5vLiBNZXNtbyBxdWUgYSByZW5kYSB0ZW5oYSBjcmVzY2lkbyBhIHBhcnRpciBkZSAyMDAwIGEgcHJvZHXDp8OjbyBjb250aW51b3UgZW0gcXVlZGEsIG1hcyBwb3Igb3V0cm8gbGFkbyBvICJjb25zdW1vIiB0ZXZlIHVtIGF1bWVudG8gYSBwYXJ0aXIgZGVzc2UgYW5vIGUgYXMgcGVzc2FvcyBjb21lw6dhcmFtIGEgcG91cGFyIG1haXMgYSBwYXJ0aXIgZGVzc2UgYW5vIHRhbWLDqW0uCgoKIyMgU8OpcmllcyBkZSB0ZW1wbywgY2ljbG8sIHNhem9uYWxpZGFkZSBlIHRlbmTDqm5jaWEgKCJyZXRhaWwueGxzeCIpCgpgYGB7cn0KIyBsZW5kbyBhIGJhc2UgZGUgZGFkb3MsIHNlbGVjaW9hbmRvIHVtIHJhbmdlIHBhcmEgYSBiYXNlLCBleGNsdWlkbyBvIGNhYmXDp2FsaG8gZG8gYXJxdWl2bywgbGltcGFuZG8gb3Mgbm9tZXMgZGFzIGNvbHVuYXMgZSBtb2RpZmljYW5kbyBhIGNvbHVuYSBkZSAgInNlcmllc19pZCIgcGFyYSAiYXNfZGF0ZSgpIi4KZGF0YSA8LSByZWFkX2V4Y2VsKCJyZXRhaWwueGxzeCIsIHJhbmdlID0gIkEyOkdIMzgzIikgJT4lIAogIGphbml0b3I6OmNsZWFuX25hbWVzKCkgJT4lIAogIGRwbHlyOjpyZW5hbWUoZGF0ZSA9IGNvbG5hbWVzKC4pWzFdKSAlPiUgCiAgbXV0YXRlKGRhdGUgPSBhc19kYXRlKGRhdGUpKQoKIyDDiSBuZWNlc3PDoXJpbyBuw6NvIHNlbGVjaW9uYXIgbyBjYWJlw6dhbGhvIGRvIGFycXVpdm8sIHBvaXMgcXVhbmRvIHNlIHBlZ2EgbyBjYWJlw6dhbGhvIG9zIGRhZG9zIGZpY2FtIGJ1Z2Fkb3MKCmBgYAoKIyMjIyBDcmlhbmRvIHVtIG5vdm8gZGF0YWZyYW1lIGUgbyB0cmFuc2Zvcm1hbmRvIGVtIHRpbWUgc2VyaWVzCmBgYHtyfQojdGltZV9zZXJpZXNfZGYgPC0gdHNfdHModHNfbG9uZyhkYXRhKSkKZGF0YSRkYXRlIDwtIHltZChkYXRhJGRhdGUpCmRmX3RzIDwtIHh0cyh4ID0gZGF0YSwgb3JkZXIuYnkgPSBkYXRhJGRhdGUpCmBgYAoKIyMjIyBQbG90YW5kbyBhIGNvbHVuYSAiYTMzNDkzMzV0IiAKYGBge3J9CmRhdGEgJT4lCiAgICBnZ3Bsb3QoIGFlcyh4ID0gZGF0ZSwgeSA9IGEzMzQ5MzM1dCwgY29sb3IgPSBhMzM0OTMzNXQpKSArCiAgICBnZW9tX2xpbmUoKQoKI2dnc2Vhc29ucGxvdCh4ID0gZGZfdHMpCmBgYAoK